Download nearby venues data from foursquare API for a city

In [36]:
import pandas as pd
import requests

Define a function to get nearbyplaces based on latitude and longitude

In [35]:
# intialize a function to get all the venue in the neighbourhood
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
                    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        print(url)
            
        # make the GET request
        response = requests.get(url).json()["response"]
        results = response['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Storing API keys in this cell

In [51]:
# @hidden
# Client Key : Client Secret
FOURSQUARE_KEYS = [['0ANT5D4J32NF4ZRXBNJGOUE1GHSYM01E34BALDNLVJVAMZIG', 'SXIAJWKCXVAZ32UJYYSD4DOTHIOJRUPHXIUJFEBSJHRMJ454']
                  ,['JFIPVNQGSM3DJVNB4KPAFDWCS2AN5MLT0CUXMBDEEZ3TFTN1', 'O5MIDR0SDHPOMB4VRJRVJY2TV0YHYIYRKUUANCQKKN3UYOF4']
                  ,['ONXQAZDGDXM0J2NWDIO15QUNYVAYXIP2GEAKGFDMOY01BPZ5', '15QX0C5ZDA2WTU0QRKARHH4SUTMRMAJFISIZAWSPOE4I0ITX']
                  ,'0XBHK2D0ZYU2SGO2GFJHEEPBLBOI2N5HY33TFTHK5VYM1VPP', 'WOWHI5MO1M2FH2V3JTAHSAZRH4TE4YKA3HDOT5BUNHXFHL4U'
                  ,['KS1TFYVJUHPCPMZOCWEYNLSAJNU5JH5WXHXW3PXPGCI2GLGI', 'AOW13VCPF2STWG0EV12QCIXZL0FPPRFZOAAGPHYRHZU0VL0U']]
In [33]:
# list of all the cities in which we are interested in
cities = ['delhi'
          ,'mumbai'
          ,'kolkata'
          ,'chennai']

For each city we call the function getNearbyVenues() and get the venues in each neighborhood of a city within a radius of 1 km

In [50]:
for city, key in zip(cities,FOURSQUARE_KEYS[:len(cities)]):
    
    # initializing foursquare API credentials
    CLIENT_ID = key[0] # your Foursquare ID
    CLIENT_SECRET = key[1] # your Foursquare Secret
    VERSION = '20180605' # Foursquare API version
    LIMIT = 100
    
    df = pd.read_csv(city + '_subdiv.csv', index_col = 0)
    data = df.copy()
    venues = getNearbyVenues(names=data['Neighborhood'],
                                   latitudes=data['Latitude'],
                                   longitudes=data['Longitude']
                                  )
    venues.to_csv(city + '_venues.csv')
    print(city,'completed!')
delhi completed!
mumbai completed!
kolkata completed!
chennai completed!

Exploring the data downloaded from the API

Delhi

In [54]:
# Delhi
df = pd.read_csv('delhi_venues.csv', index_col = 0)
df.head()
Out[54]:
Neighborhood Neighborhood Latitude Neighborhood Longitude Venue Venue Latitude Venue Longitude Venue Category Venue Summary Venue Type
0 Adarsh Nagar 28.720341 77.172661 Giani's 28.717900 77.173907 Ice Cream Shop This spot is popular general
1 Adarsh Nagar 28.720341 77.172661 Axis Bank ATM 28.723032 77.170631 ATM This spot is popular general
2 Adarsh Nagar 28.720341 77.172661 Adarsh Nagar Metro Station 28.716598 77.170436 Light Rail Station This spot is popular general
3 Adarsh Nagar 28.720341 77.172661 Vishyavidyalaya Metro Station@Entry gate #1 n ... 28.715596 77.170981 Train Station This spot is popular general
4 Adarsh Nagar 28.720341 77.172661 Pahalwan Dhaba 28.714594 77.172155 Indian Restaurant This spot is popular general
In [55]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3269 entries, 0 to 3268
Data columns (total 9 columns):
Neighborhood              3269 non-null object
Neighborhood Latitude     3269 non-null float64
Neighborhood Longitude    3269 non-null float64
Venue                     3269 non-null object
Venue Latitude            3269 non-null float64
Venue Longitude           3269 non-null float64
Venue Category            3269 non-null object
Venue Summary             3269 non-null object
Venue Type                3269 non-null object
dtypes: float64(4), object(5)
memory usage: 255.4+ KB

Mumbai

In [56]:
# Mumbai
df = pd.read_csv('mumbai_venues.csv', index_col = 0)
df.head()
Out[56]:
Neighborhood Neighborhood Latitude Neighborhood Longitude Venue Venue Latitude Venue Longitude Venue Category
0 Amboli 19.129061 72.846451 Cafe Arfa 19.128930 72.847140 Indian Restaurant
1 Amboli 19.129061 72.846451 Shawarma Factory 19.124591 72.840398 Falafel Restaurant
2 Amboli 19.129061 72.846451 5 Spice , Bandra 19.130421 72.847206 Chinese Restaurant
3 Amboli 19.129061 72.846451 Jaffer Bhai's Delhi Darbar 19.137714 72.845909 Mughlai Restaurant
4 Amboli 19.129061 72.846451 Persia Darbar 19.136952 72.846822 Indian Restaurant
In [57]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5533 entries, 0 to 5532
Data columns (total 7 columns):
Neighborhood              5533 non-null object
Neighborhood Latitude     5533 non-null float64
Neighborhood Longitude    5533 non-null float64
Venue                     5533 non-null object
Venue Latitude            5533 non-null float64
Venue Longitude           5533 non-null float64
Venue Category            5533 non-null object
dtypes: float64(4), object(3)
memory usage: 345.8+ KB

Kolkata

In [58]:
# Kolkata
df = pd.read_csv('kolkata_venues.csv', index_col = 0)
df.head()
Out[58]:
Neighborhood Neighborhood Latitude Neighborhood Longitude Venue Venue Latitude Venue Longitude Venue Category Venue Summary Venue Type
0 Kalyani Municipality 22.570539 88.371239 Bhim Chandra Nag 22.570639 88.371524 Indian Sweet Shop This spot is popular general
1 Kalyani Municipality 22.570539 88.371239 Big Bazaar 22.565919 88.369635 Department Store This spot is popular general
2 Kalyani Municipality 22.570539 88.371239 Indian Coffee House 22.576187 88.364013 Café This spot is popular general
3 Kalyani Municipality 22.570539 88.371239 Paramount 22.573874 88.364496 Juice Bar This spot is popular general
4 Kalyani Municipality 22.570539 88.371239 Café Coffee Day 22.565919 88.369635 Café This spot is popular general
In [59]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 303 entries, 0 to 302
Data columns (total 9 columns):
Neighborhood              303 non-null object
Neighborhood Latitude     303 non-null float64
Neighborhood Longitude    303 non-null float64
Venue                     303 non-null object
Venue Latitude            303 non-null float64
Venue Longitude           303 non-null float64
Venue Category            303 non-null object
Venue Summary             303 non-null object
Venue Type                303 non-null object
dtypes: float64(4), object(5)
memory usage: 23.7+ KB

Chennai

In [60]:
# Chennai
df = pd.read_csv('chennai_venues.csv', index_col = 0)
df.head()
Out[60]:
Neighborhood Neighborhood Latitude Neighborhood Longitude Venue Venue Latitude Venue Longitude Venue Category
0 Red Hills 13.19543 80.184303 Hotel Balaji Bavan 13.193716 80.185292 Indian Restaurant
1 Red Hills 13.19543 80.184303 Radha Movie Park 13.193264 80.183417 Multiplex
2 Red Hills 13.19543 80.184303 Rock Gym 13.194852 80.186736 Gym
3 Red Hills 13.19543 80.184303 Redhills anna bus stand 13.192871 80.185003 Bus Station
4 Red Hills 13.19543 80.184303 Naturals 13.191494 80.185965 Spa
In [61]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2071 entries, 0 to 2070
Data columns (total 7 columns):
Neighborhood              2071 non-null object
Neighborhood Latitude     2071 non-null float64
Neighborhood Longitude    2071 non-null float64
Venue                     2071 non-null object
Venue Latitude            2071 non-null float64
Venue Longitude           2071 non-null float64
Venue Category            2071 non-null object
dtypes: float64(4), object(3)
memory usage: 129.4+ KB